Integration¶

Sanorama Hie et al., 2019
GitHub
Tutorial external API
External external API tutorial

A fix to run scran pooling normalization computeSumFactors in current python environment.

In [1]:
import scanpy as sc
import scanpy.external as sce

import numpy as np
import pandas as pd

import os
In [2]:
# Working directory 
os.chdir('/research/peer/fdeckert/FD20200109SPLENO')
In [3]:
# rpy2 
os.environ['R_HOME'] = '/home/fdeckert/bin/miniconda3/envs/p.3.8.12-FD20200109SPLENO/lib/R'
In [4]:
# Plotting 
import rpy2.robjects as robjects
color_load = robjects.r.source('plotting_global.R')
color = dict()
for i in range(len(color_load[0])):
    color[color_load[0].names[i]] = {key : color_load[0][i].rx2(key)[0] for key in color_load[0][i].names}

sc.set_figure_params(figsize=(5, 5))

Parameter¶

In [5]:
# Scanpy 
n_comps=100 #PCA
dimred=n_comps #neighbors
n_neighbors=50 #neighbors

Scanorama¶

In [6]:
adata = sc.read_h5ad('data/object/so_sct.h5ad')
adata = adata.raw.to_adata()
In [7]:
def set_color(categories): 
    
    categories = [x for x in categories if x in list(adata.obs.columns)]

    for category in categories: 
        
        adata.obs[category] = pd.Series(adata.obs[category], dtype='category')
        
        keys = list(color[category].keys())
        keys = [x for x in keys if x in list(adata.obs[category])]

        adata.obs[category] = adata.obs[category].cat.reorder_categories(keys)
        adata.uns[category+'_colors'] = np.array([color[category].get(key) for key in keys], dtype=object)
        
# Set colors
set_color(list(color.keys()))
In [8]:
sc.pp.normalize_total(adata)
sc.pp.log1p(adata)
sc.pp.scale(adata)
sc.tl.pca(adata, n_comps=n_comps)
In [9]:
adata.obs['integrate'] = adata.obs['treatment'].astype(str)+adata.obs['sample_rep'].astype(str)
sce.pp.scanorama_integrate(adata, key='integrate', basis='X_pca')
[[0.         0.73275236 0.70055677 0.45073832]
 [0.         0.         0.66035183 0.45872801]
 [0.         0.         0.         0.68003324]
 [0.         0.         0.         0.        ]]
Processing datasets NaClRep2 <=> NaClRep1
Processing datasets NaClRep2 <=> CpGRep2
Processing datasets CpGRep2 <=> CpGRep1
Processing datasets NaClRep1 <=> CpGRep2
Processing datasets NaClRep1 <=> CpGRep1
Processing datasets NaClRep2 <=> CpGRep1
In [10]:
# # Dimensional reduction and clustering 
sc.pp.neighbors(adata, n_neighbors=n_neighbors, n_pcs=dimred, use_rep='X_scanorama')
sc.tl.leiden(adata, resolution=1)
sc.tl.louvain(adata, resolution=1)
sc.tl.umap(adata, min_dist=0.3)

# Plot 
sc.pl.umap(adata, color=['louvain', 'leiden', 'tissue', 'treatment', 'label_fine_haemosphere', 'sample_rep', 'cc_phase_class', 'pHb_RNA', 'pRb_RNA', 'pMt_RNA', 'nCount_RNA', 'nFeature_RNA'], wspace=0.5, ncols=3)

Scanorama (HVG 8000)¶

In [11]:
adata = sc.read_h5ad('data/object/so_sct.h5ad')
hvg_8000 = list(adata.uns['hvg_int_8000'])
adata = adata.raw.to_adata()
In [12]:
def set_color(categories): 
    
    categories = [x for x in categories if x in list(adata.obs.columns)]

    for category in categories: 
        
        adata.obs[category] = pd.Series(adata.obs[category], dtype='category')
        
        keys = list(color[category].keys())
        keys = [x for x in keys if x in list(adata.obs[category])]

        adata.obs[category] = adata.obs[category].cat.reorder_categories(keys)
        adata.uns[category+'_colors'] = np.array([color[category].get(key) for key in keys], dtype=object)
        
# Set colors
set_color(list(color.keys()))
In [13]:
sc.pp.normalize_total(adata)
sc.pp.log1p(adata)
sc.pp.scale(adata)
adata = adata[:,hvg_8000]
sc.tl.pca(adata, n_comps=n_comps)
In [14]:
adata.obs['integrate'] = adata.obs['treatment'].astype(str)+adata.obs['sample_rep'].astype(str)
sce.pp.scanorama_integrate(adata, key='integrate', basis='X_pca')
[[0.         0.72040668 0.74776083 0.46574679]
 [0.         0.         0.6011502  0.41441137]
 [0.         0.         0.         0.71514648]
 [0.         0.         0.         0.        ]]
Processing datasets NaClRep2 <=> CpGRep2
Processing datasets NaClRep2 <=> NaClRep1
Processing datasets CpGRep2 <=> CpGRep1
Processing datasets NaClRep1 <=> CpGRep2
Processing datasets NaClRep2 <=> CpGRep1
Processing datasets NaClRep1 <=> CpGRep1
In [15]:
# # Dimensional reduction and clustering 
sc.pp.neighbors(adata, n_neighbors=n_neighbors, n_pcs=dimred, use_rep='X_scanorama')
sc.tl.leiden(adata, resolution=1)
sc.tl.louvain(adata, resolution=1)
sc.tl.umap(adata, min_dist=0.3)

# Plot 
sc.pl.umap(adata, color=['louvain', 'leiden', 'tissue', 'treatment', 'label_fine_haemosphere', 'sample_rep', 'cc_phase_class', 'pHb_RNA', 'pRb_RNA', 'pMt_RNA', 'nCount_RNA', 'nFeature_RNA'], wspace=0.5, ncols=3)

Scanorama (HVG 6000)¶

In [16]:
adata = sc.read_h5ad('data/object/so_sct.h5ad')
hvg_6000 = list(adata.uns['hvg_int_6000'])
adata = adata.raw.to_adata()
In [17]:
def set_color(categories): 
    
    categories = [x for x in categories if x in list(adata.obs.columns)]

    for category in categories: 
        
        adata.obs[category] = pd.Series(adata.obs[category], dtype='category')
        
        keys = list(color[category].keys())
        keys = [x for x in keys if x in list(adata.obs[category])]

        adata.obs[category] = adata.obs[category].cat.reorder_categories(keys)
        adata.uns[category+'_colors'] = np.array([color[category].get(key) for key in keys], dtype=object)
        
# Set colors
set_color(list(color.keys()))
In [18]:
sc.pp.normalize_total(adata)
sc.pp.log1p(adata)
sc.pp.scale(adata)
adata = adata[:,hvg_6000]
sc.tl.pca(adata, n_comps=n_comps)
In [19]:
adata.obs['integrate'] = adata.obs['treatment'].astype(str)+adata.obs['sample_rep'].astype(str)
sce.pp.scanorama_integrate(adata, key='integrate', basis='X_pca')
[[0.         0.70152505 0.76615832 0.47857662]
 [0.         0.         0.58964817 0.42320704]
 [0.         0.         0.         0.71036775]
 [0.         0.         0.         0.        ]]
Processing datasets NaClRep2 <=> CpGRep2
Processing datasets CpGRep2 <=> CpGRep1
Processing datasets NaClRep2 <=> NaClRep1
Processing datasets NaClRep1 <=> CpGRep2
Processing datasets NaClRep2 <=> CpGRep1
Processing datasets NaClRep1 <=> CpGRep1
In [20]:
# # Dimensional reduction and clustering 
sc.pp.neighbors(adata, n_neighbors=n_neighbors, n_pcs=dimred, use_rep='X_scanorama')
sc.tl.leiden(adata, resolution=1)
sc.tl.louvain(adata, resolution=1)
sc.tl.umap(adata, min_dist=0.3)

# Plot 
sc.pl.umap(adata, color=['louvain', 'leiden', 'tissue', 'treatment', 'label_fine_haemosphere', 'sample_rep', 'cc_phase_class', 'pHb_RNA', 'pRb_RNA', 'pMt_RNA', 'nCount_RNA', 'nFeature_RNA'], wspace=0.5, ncols=3)

Scanorama (HVG 4000)¶

In [21]:
adata = sc.read_h5ad('data/object/so_sct.h5ad')
hvg_4000 = list(adata.uns['hvg_int_4000'])
adata = adata.raw.to_adata()
In [22]:
def set_color(categories): 
    
    categories = [x for x in categories if x in list(adata.obs.columns)]

    for category in categories: 
        
        adata.obs[category] = pd.Series(adata.obs[category], dtype='category')
        
        keys = list(color[category].keys())
        keys = [x for x in keys if x in list(adata.obs[category])]

        adata.obs[category] = adata.obs[category].cat.reorder_categories(keys)
        adata.uns[category+'_colors'] = np.array([color[category].get(key) for key in keys], dtype=object)
        
# Set colors
set_color(list(color.keys()))
In [23]:
sc.pp.normalize_total(adata)
sc.pp.log1p(adata)
sc.pp.scale(adata)
adata = adata[:,hvg_4000]
sc.tl.pca(adata, n_comps=n_comps)
In [24]:
adata.obs['integrate'] = adata.obs['treatment'].astype(str)+adata.obs['sample_rep'].astype(str)
sce.pp.scanorama_integrate(adata, key='integrate', basis='X_pca')
[[0.         0.67634955 0.7862503  0.4839022 ]
 [0.         0.         0.58186739 0.42016238]
 [0.         0.         0.         0.69686266]
 [0.         0.         0.         0.        ]]
Processing datasets NaClRep2 <=> CpGRep2
Processing datasets CpGRep2 <=> CpGRep1
Processing datasets NaClRep2 <=> NaClRep1
Processing datasets NaClRep1 <=> CpGRep2
Processing datasets NaClRep2 <=> CpGRep1
Processing datasets NaClRep1 <=> CpGRep1
In [25]:
# # Dimensional reduction and clustering 
sc.pp.neighbors(adata, n_neighbors=n_neighbors, n_pcs=dimred, use_rep='X_scanorama')
sc.tl.leiden(adata, resolution=1)
sc.tl.louvain(adata, resolution=1)
sc.tl.umap(adata, min_dist=0.3)

# Plot 
sc.pl.umap(adata, color=['louvain', 'leiden', 'tissue', 'treatment', 'label_fine_haemosphere', 'sample_rep', 'cc_phase_class', 'pHb_RNA', 'pRb_RNA', 'pMt_RNA', 'nCount_RNA', 'nFeature_RNA'], wspace=0.5, ncols=3)

Scanorama (HVG 2000)¶

In [26]:
adata = sc.read_h5ad('data/object/so_sct.h5ad')
hvg_2000 = list(adata.uns['hvg_int_2000'])
adata = adata.raw.to_adata()
In [27]:
def set_color(categories): 
    
    categories = [x for x in categories if x in list(adata.obs.columns)]

    for category in categories: 
        
        adata.obs[category] = pd.Series(adata.obs[category], dtype='category')
        
        keys = list(color[category].keys())
        keys = [x for x in keys if x in list(adata.obs[category])]

        adata.obs[category] = adata.obs[category].cat.reorder_categories(keys)
        adata.uns[category+'_colors'] = np.array([color[category].get(key) for key in keys], dtype=object)
        
# Set colors
set_color(list(color.keys()))
In [28]:
sc.pp.normalize_total(adata)
sc.pp.log1p(adata)
sc.pp.scale(adata)
adata = adata[:,hvg_2000]
sc.tl.pca(adata, n_comps=n_comps)
In [29]:
adata.obs['integrate'] = adata.obs['treatment'].astype(str)+adata.obs['sample_rep'].astype(str)
sce.pp.scanorama_integrate(adata, key='integrate', basis='X_pca')
[[0.         0.71532317 0.81215202 0.46671508]
 [0.         0.         0.62483085 0.41136671]
 [0.         0.         0.         0.72574278]
 [0.         0.         0.         0.        ]]
Processing datasets NaClRep2 <=> CpGRep2
Processing datasets CpGRep2 <=> CpGRep1
Processing datasets NaClRep2 <=> NaClRep1
Processing datasets NaClRep1 <=> CpGRep2
Processing datasets NaClRep2 <=> CpGRep1
Processing datasets NaClRep1 <=> CpGRep1
In [30]:
# # Dimensional reduction and clustering 
sc.pp.neighbors(adata, n_neighbors=n_neighbors, n_pcs=dimred, use_rep='X_scanorama')
sc.tl.leiden(adata, resolution=1)
sc.tl.louvain(adata, resolution=1)
sc.tl.umap(adata, min_dist=0.3)

# Plot 
sc.pl.umap(adata, color=['louvain', 'leiden', 'tissue', 'treatment', 'label_fine_haemosphere', 'sample_rep', 'cc_phase_class', 'pHb_RNA', 'pRb_RNA', 'pMt_RNA', 'nCount_RNA', 'nFeature_RNA'], wspace=0.5, ncols=3)